#Replication files for Generalizability of IR experiments beyond the U.S.
#Create Figure 2: Steps in selecting countries

#Packages
rm(list=ls())
library("stringr")
library("tidyverse")
library("haven")
library("lfe")
library("reshape2")
library("naniar")
library("fixest")
library("Hmisc")
library("dplyr")
library("estimatr")
library("stargazer")
library("texreg")
library("countrycode")
library("RColorBrewer")
library("ggpubr")

#load cross country dataset
data<-read_csv(file = "data/cross_national/cross_national.csv")

#create variable for democracies based on polity score and freedom house
data$democracy<-ifelse(data$polity2>5,"Democracy","Non-Democracy")

#code democracy based on freedom house for missing polity values (PF or F)
data$democracy<-ifelse(is.na(data$democracy) & data$freedom_house==1,"Democracy",data$democracy)
data$democracy<-ifelse(is.na(data$democracy) & data$freedom_house==0,"Non-Democracy",data$democracy)

# load map
world_map <- map_data("world")

#rename some regions that appear once in `data`
world_map$region<-ifelse(world_map$region=="Barbuda","Antigua",world_map$region)
world_map$region<-ifelse(world_map$region=="American Samoa","Samoa",world_map$region)
world_map$region<-ifelse(world_map$region=="French Guiana","Guyana",world_map$region)

# Join data and map
count_map <- left_join(world_map,data, by = c("region"))

# plot map for democracies -- Step 1

step1<-ggplot(count_map, aes(long, lat, group = group))+
  geom_polygon(aes(fill = democracy), color = "white", 
               size =.05)+
  labs(title = "",
       fill = "",
       caption = "Step 1: Determine scope conditions")+
  scale_fill_grey(na.value = "gray95", na.translate = T) +
  theme_void(base_size = 12)+
  theme(legend.position="bottom",
        text = element_text(family = "serif"),
        plot.caption = element_text(hjust = 0.5, face = "bold"))

#note that some small islands, greenland, and antarctica are missing.

# Sort by GDP (policy importance) -- Step 2

#plot figure by region (most powerful dem by region
count_map$GDP<-ifelse(count_map$GDP=="..", NA, count_map$GDP)

count_map$gdp_by_dem<-ifelse(count_map$democracy=="Democracy",count_map$GDP, NA)
count_map$gdp_by_dem<-as.numeric(count_map$gdp_by_dem)

count_map <-
  count_map %>% 
  mutate(.,
         GDP_new = case_when(
           gdp_by_dem > 2500000000000 ~ "Over 2.5 trillion",
           gdp_by_dem > 1000000000000 & gdp_by_dem < 2500000000000 ~ "1 trillion - 2.5 trillion",
           gdp_by_dem > 500000000000 & gdp_by_dem < 1000000000000 ~ "500 billion - 1 trillion",
           gdp_by_dem > 100000000000 & gdp_by_dem < 500000000000 ~ "100 billion - 500 billion",
           gdp_by_dem < 100000000000 ~ "Under 100 billion",
           democracy == "Non-Democracy" ~ "Non-Democracy"
         ))

count_map$GDP_new <- factor(count_map$GDP_new, 
                            levels = c("Over 2.5 trillion",
                                       "1 trillion - 2.5 trillion",
                                       "500 billion - 1 trillion",
                                       "100 billion - 500 billion",
                                       "Under 100 billion",
                                       "Non-Democracy"))

step2<-ggplot(count_map, aes(long, lat, group = group))+
  geom_polygon(aes(fill = GDP_new), color = "white", 
               size =.01)+
  labs(title = "",
       fill = "",
       caption = "Step 2: Sort by policy importance (GDP)")+
  scale_fill_grey(na.value = "gray95", na.translate = T) +
  theme_void(base_size = 12)+
  theme(legend.position="bottom",
        text = element_text(family = "serif"),
        plot.caption = element_text(hjust = 0.5, face = "bold"))

# Consider uobserved moderators -- Step 3

#most powerful democracy per region
data$GDP<-as.numeric(data$GDP)
most_powerful<-data%>%
  filter(polity2>5) %>%
  group_by(Region_new,region) %>% 
  tally(GDP) %>% 
  top_n(n =1)

most_powerful<-as.vector(most_powerful$region)

count_map$top_gdp<-ifelse(count_map$region %in% most_powerful, 
                          "Most powerful Democracies \n per region (by GDP)", NA)

new_map <- count_map %>%
  group_by(region) %>%
  summarise(long = mean(long), lat = mean(lat))

#place country names in correct long. and lat.
new_map$lat<-new_map$lat-10
new_map$long<-new_map$long-15
new_map$long<-ifelse(new_map$region=="Brazil", new_map$long+42, new_map$long)
new_map$long<-ifelse(new_map$region=="India", new_map$long-5, new_map$long)
new_map$long<-ifelse(new_map$region=="Japan", new_map$long+8, new_map$long)
new_map$lat<-ifelse(new_map$region=="Germany", new_map$lat+5, new_map$lat)

new_map$new_name<-ifelse(new_map$region %in% most_powerful, 
                         new_map$region, "")

new_map<-na.omit(new_map)

step3 <-ggplot(count_map, aes(long, lat, group = group))+
  geom_polygon(aes(fill = top_gdp), color = "white", 
               size =.01)+
  geom_text(aes(long,lat, label = new_name, group = 1), 
            fontface = "bold", color = "red",
            data = new_map, size=2, show.legend = FALSE)+
  
  labs(title = "",
       fill = "",
       caption = "Step 3: Consider unobserved moderators")+
  scale_fill_grey(start = 0.2, end = 0.8, drop=FALSE, na.value = "gray",
                  limits = "Most powerful Democracies \n per region (by GDP)") +
  
  theme_void(base_size = 12)+
  theme(legend.position="bottom",
        text = element_text(family = "serif"),
        plot.caption = element_text(hjust = 0.5, face = "bold"))


# Verify variation across key moderators -- Step 4

#Selected countries

data$democracy_n<-ifelse(data$democracy=="Democracy",data$dem_years,NA)
data$govnt_spend_exp_norm_n<-ifelse(data$democracy=="Democracy",data$govnt_spend_exp_norm,NA)
data$treaties_n<-ifelse(data$democracy=="Democracy",data$treaties,NA)
data$theta_mean_norm_n<-ifelse(data$democracy=="Democracy",data$theta_mean_norm,NA)

#USA, Germany, India, Japan, Brazil, Israel,  Nigeria
data_selected<- data[ which(data$region=="USA"| 
                              data$region == "Germany"|
                              data$region == "India"|
                              data$region == "Japan"|
                              data$region == "Brazil"|
                              data$region == "Nigeria"|
                              data$region == "Israel"), ] 

keep_vars <- c("region", "govnt_spend_exp_norm",
               "dem_years",  "treaties", "theta_mean_norm")

data_selected <- data_selected[keep_vars]
data_selected <- melt(data_selected, id.vars=1)
data_selected$Value<- as.numeric(data_selected$value)

mean(data$govnt_spend_exp_norm_n,na.rm=T) #0.05

mean(data$democracy_n,na.rm=T)#47

mean(data$treaties_n,na.rm=T)#12.5

mean(data$theta_mean_norm_n,na.rm=T)#0.50


levels(data_selected$variable)
levels(data_selected$variable) <- c(
  "Military expenditure \n as proportion of government \n spending 2020",
  "Number of years \n as a democracy",
  "Number of ratified \n Human Rights treaties",
  "Physical Integrity \n Rights 2019 (normalized)")

df <- data.frame (
                  variable = c("Military expenditure \n as proportion of government \n spending 2020",
                               "Number of years \n as a democracy",
                               "Number of ratified \n Human Rights treaties",
                               "Physical Integrity \n Rights 2019 (normalized)"),
                  median_val = c(0.05, 47,12.5, 0.50)
)


step4<- ggplot(data_selected, aes(region, Value, group = 1)) +
  labs(caption = "Step 4: Verify potential variation across key moderators")+
  geom_bar(stat="identity") + 
  facet_wrap(~variable, scales = "free")+ 
  geom_hline(data= df, aes(yintercept=median_val, linetype = "All democracies mean"), color = "red") +
  scale_linetype_manual(values = 2, 
                        guide = guide_legend(override.aes = list(color = "red"))) +
  theme(
        plot.title = element_text(size = 11, face = "bold"),
        plot.caption = element_text(hjust=0.5, 
                                    face = "bold"),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid.major = element_blank(), 
        text = element_text(family = "serif"),
        panel.grid.minor = element_blank(),
        legend.position="bottom",
        legend.title = element_blank()) 

###########################
# Plot figure
###########################
  
steps<- ggarrange(step1, step2, step3, step4, ncol = 2, nrow = 2, heights = c(0.07, 0.07),
                  widths = c(0.1,0.1))


ggsave("figures/main_text/steps.pdf",steps, width = 13, height = 9)



